Chante Bethell for CCDL 2019
This notebooks creates oncoprints given the relevant metadata and output MAF files from the snv callers strelka2, mutect2, lancet, and vardict. It addresses issue #6 in OpenPBTA.
Output Files
analyses/oncoprint-landscape/plots/combined_maf_oncoprint.png
analyses/oncoprint-landscape/plots/strelka2_oncoprint.png
analyses/oncoprint-landscape/plots/mutect2_oncoprint.png
analyses/oncoprint-landscape/plots/lancet_oncoprint.png
analyses/oncoprint-landscape/plots/vardict_oncoprint.png
Usage
This script is intended to be run via the command line from the top directory of the repository as follows:
Rscript -e "rmarkdown::render('analyses/oncoprint-landscape/01-plot-oncoprint.Rmd',
clean = TRUE)"
Set Up
# Install maftools
if (!("maftools" %in% installed.packages())) {
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install("maftools")
}
library(maftools)
# Get `magrittr` pipe
`%>%` <- dplyr::`%>%`
# Define a color vector for plots
colores = c("Missense_Mutation" = "#35978f",
"Nonsense_Mutation" = "#191970",
"Frame_Shift_Del" = "#56B4E9",
"Frame_Shift_Ins" = "#FFBBFF",
"Splice_Site" = "#F0E442",
"Nonstop_Mutation" = "#56B4E9",
"In_Frame_Del" = "#CAE1FF",
"In_Frame_Ins" = "#FFE4E1",
"Multi_Hit" = "#f46d43")
Create function to plot oncoprints
plot_oncoplot <- function(maf_file, filename){
# Given a maf file and a filename, plot an oncoprint of the variants in the
# dataset, save as a png file and display.
# Args:
# maf_file: name or path to a maf file
# filename: name to save the png file as
# Return:
# oncoprint: plot produced using the maftools `oncoplot` function
# Plot and save the oncoprint
png(file.path(plots_dir, filename), width = 60, height = 30, units = "cm", res = 300)
oncoplot(maf_file,
clinicalFeatures = c(
"broad_histology",
"short_histology",
"reported_gender",
"tumor_descriptor"
),
logColBar = TRUE,
sortByAnnotation = TRUE,
showTumorSampleBarcodes = TRUE,
removeNonMutated = FALSE,
annotationFontSize = 0.7,
SampleNamefontSize = 0.5,
fontSize = 0.7,
colors = colores
)
dev.off()
# Display saved oncoprint
knitr::include_graphics(file.path(plots_dir, filename))
}
Directories and Files
# Path to the data obtained via `bash download-data.sh`.
data_dir <- file.path("..", "..", "data")
# Path to output directory for plots produced
plots_dir <- "plots"
if (!dir.exists(plots_dir)) {
dir.create(plots_dir)
}
Read in data
# Read in metadata
metadata <- readr::read_tsv(file.path(data_dir, "pbta-histologies.tsv"))
# Rename for maftools function
metadata <- metadata %>%
dplyr::rename("Tumor_Sample_Barcode" = "Kids_First_Biospecimen_ID")
# Read maf files into a list
maf_list <- list(
strelka2 = read.maf(
file.path(data_dir, "pbta-snv-strelka2.vep.maf.gz"),
clinicalData = metadata,
verbose = FALSE
),
mutect2 = read.maf(
file.path(data_dir, "pbta-snv-mutect2.vep.maf.gz"),
clinicalData = metadata,
verbose = FALSE
),
lancet = read.maf(
file.path(data_dir, "pbta-snv-lancet.vep.maf.gz"),
clinicalData = metadata,
verbose = FALSE
),
vardict = read.maf(
file.path(data_dir, "pbta-snv-vardict.vep.maf.gz"),
clinicalData = metadata,
verbose = FALSE
),
combined = read.maf(
file.path("data", "pbta-snv-combined.vep.maf.gz"),
clinicalData = metadata,
verbose = FALSE
)
)
Plot, Display, and Save Oncoprints
# Plot oncoprint using combined maf output data and save. This combined data
# was created using the `cat` function via command line to combine the data from
# strelka2, mutect2, lancet, and vardict.
plot_oncoplot(maf_list$combined, "combined_maf_oncoprint.png")

# Plot oncoprint using Strelka2 output data and save
plot_oncoplot(maf_list$strelka2, "strelka2_oncoprint.png")

# Plot oncoprint using Mutect2 output data and save
plot_oncoplot(maf_list$mutect2, "mutect2_oncoprint.png")

# Plot oncoprint using Lancet output data and save
plot_oncoplot(maf_list$lancet, "lancet_oncoprint.png")

# Plot oncoprint using Vardict output data and save
plot_oncoplot(maf_list$vardict, "vardict_oncoprint.png")

Session Info
sessionInfo()
LS0tCnRpdGxlOiAiT25jb3ByaW50cyIKb3V0cHV0OiAgIAogIGh0bWxfbm90ZWJvb2s6IAogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKLS0tCgpDaGFudGUgQmV0aGVsbCBmb3IgQ0NETCAyMDE5CgpUaGlzIG5vdGVib29rcyBjcmVhdGVzIG9uY29wcmludHMgZ2l2ZW4gdGhlIHJlbGV2YW50IG1ldGFkYXRhIGFuZCBvdXRwdXQgTUFGIApmaWxlcyBmcm9tIHRoZSBzbnYgY2FsbGVycyBzdHJlbGthMiwgbXV0ZWN0MiwgbGFuY2V0LCBhbmQgdmFyZGljdC4gSXQgCmFkZHJlc3NlcyBbaXNzdWUgIzYgaW4gT3BlblBCVEFdKGh0dHBzOi8vZ2l0aHViLmNvbS9BbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzL2lzc3Vlcy82KS4KCiMjIE91dHB1dCBGaWxlcwoKLSBgYW5hbHlzZXMvb25jb3ByaW50LWxhbmRzY2FwZS9wbG90cy9jb21iaW5lZF9tYWZfb25jb3ByaW50LnBuZ2AKLSBgYW5hbHlzZXMvb25jb3ByaW50LWxhbmRzY2FwZS9wbG90cy9zdHJlbGthMl9vbmNvcHJpbnQucG5nYAotIGBhbmFseXNlcy9vbmNvcHJpbnQtbGFuZHNjYXBlL3Bsb3RzL211dGVjdDJfb25jb3ByaW50LnBuZ2AKLSBgYW5hbHlzZXMvb25jb3ByaW50LWxhbmRzY2FwZS9wbG90cy9sYW5jZXRfb25jb3ByaW50LnBuZ2AKLSBgYW5hbHlzZXMvb25jb3ByaW50LWxhbmRzY2FwZS9wbG90cy92YXJkaWN0X29uY29wcmludC5wbmdgCgojIFVzYWdlCgpUaGlzIHNjcmlwdCBpcyBpbnRlbmRlZCB0byBiZSBydW4gdmlhIHRoZSBjb21tYW5kIGxpbmUgZnJvbSB0aGUgdG9wIGRpcmVjdG9yeQpvZiB0aGUgcmVwb3NpdG9yeSBhcyBmb2xsb3dzOgoKYGBgClJzY3JpcHQgLWUgInJtYXJrZG93bjo6cmVuZGVyKCdhbmFseXNlcy9vbmNvcHJpbnQtbGFuZHNjYXBlLzAxLXBsb3Qtb25jb3ByaW50LlJtZCcsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjbGVhbiA9IFRSVUUpIgpgYGAKCiMgU2V0IFVwCgpgYGB7ciwgd2FybmluZyA9IEZBTFNFfQojIEluc3RhbGwgbWFmdG9vbHMKaWYgKCEoIm1hZnRvb2xzIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygpKSkgewogIGlmICghcmVxdWlyZU5hbWVzcGFjZSgiQmlvY01hbmFnZXIiLCBxdWlldGx5ID0gVFJVRSkpIHsKICAgIGluc3RhbGwucGFja2FnZXMoIkJpb2NNYW5hZ2VyIikKICB9CiAgQmlvY01hbmFnZXI6Omluc3RhbGwoIm1hZnRvb2xzIikKfQpsaWJyYXJ5KG1hZnRvb2xzKQoKIyBHZXQgYG1hZ3JpdHRyYCBwaXBlCmAlPiVgIDwtIGRwbHlyOjpgJT4lYAoKIyBEZWZpbmUgYSBjb2xvciB2ZWN0b3IgZm9yIHBsb3RzIApjb2xvcmVzID0gYygiTWlzc2Vuc2VfTXV0YXRpb24iID0gIiMzNTk3OGYiLCAKICAgICAgICAiTm9uc2Vuc2VfTXV0YXRpb24iID0gIiMxOTE5NzAiLAogICAgICAgICJGcmFtZV9TaGlmdF9EZWwiID0gIiM1NkI0RTkiLCAKICAgICAgICAiRnJhbWVfU2hpZnRfSW5zIiA9ICIjRkZCQkZGIiwgCiAgICAgICAgIlNwbGljZV9TaXRlIiA9ICIjRjBFNDQyIiwKICAgICAgICAiTm9uc3RvcF9NdXRhdGlvbiIgPSAiIzU2QjRFOSIsCiAgICAgICAgIkluX0ZyYW1lX0RlbCIgPSAiI0NBRTFGRiIsCiAgICAgICAgIkluX0ZyYW1lX0lucyIgPSAiI0ZGRTRFMSIsCiAgICAgICAgIk11bHRpX0hpdCIgPSAiI2Y0NmQ0MyIpCmBgYAoKIyBDcmVhdGUgZnVuY3Rpb24gdG8gcGxvdCBvbmNvcHJpbnRzCgpgYGB7cn0KcGxvdF9vbmNvcGxvdCA8LSBmdW5jdGlvbihtYWZfZmlsZSwgZmlsZW5hbWUpewogICMgR2l2ZW4gYSBtYWYgZmlsZSBhbmQgYSBmaWxlbmFtZSwgcGxvdCBhbiBvbmNvcHJpbnQgb2YgdGhlIHZhcmlhbnRzIGluIHRoZSAKICAjIGRhdGFzZXQsIHNhdmUgYXMgYSBwbmcgZmlsZSBhbmQgZGlzcGxheS4KICAjIEFyZ3M6CiAgIyAgIG1hZl9maWxlOiBuYW1lIG9yIHBhdGggdG8gYSBtYWYgZmlsZQogICMgICBmaWxlbmFtZTogbmFtZSB0byBzYXZlIHRoZSBwbmcgZmlsZSBhcyAKICAjIFJldHVybjoKICAjICAgb25jb3ByaW50OiBwbG90IHByb2R1Y2VkIHVzaW5nIHRoZSBtYWZ0b29scyBgb25jb3Bsb3RgIGZ1bmN0aW9uCiAgCiAgIyBQbG90IGFuZCBzYXZlIHRoZSBvbmNvcHJpbnQgCiAgcG5nKGZpbGUucGF0aChwbG90c19kaXIsIGZpbGVuYW1lKSwgd2lkdGggPSA2MCwgaGVpZ2h0ID0gMzAsIHVuaXRzID0gImNtIiwgcmVzID0gMzAwKQogIG9uY29wbG90KG1hZl9maWxlLAogIGNsaW5pY2FsRmVhdHVyZXMgPSBjKAogICAgImJyb2FkX2hpc3RvbG9neSIsCiAgICAic2hvcnRfaGlzdG9sb2d5IiwKICAgICJyZXBvcnRlZF9nZW5kZXIiLAogICAgInR1bW9yX2Rlc2NyaXB0b3IiCiAgKSwKICBsb2dDb2xCYXIgPSBUUlVFLAogIHNvcnRCeUFubm90YXRpb24gPSBUUlVFLAogIHNob3dUdW1vclNhbXBsZUJhcmNvZGVzID0gVFJVRSwKICByZW1vdmVOb25NdXRhdGVkID0gRkFMU0UsCiAgYW5ub3RhdGlvbkZvbnRTaXplID0gMC43LAogIFNhbXBsZU5hbWVmb250U2l6ZSA9IDAuNSwKICBmb250U2l6ZSA9IDAuNywKICBjb2xvcnMgPSBjb2xvcmVzCiAgKQogIGRldi5vZmYoKQogIAogICMgRGlzcGxheSBzYXZlZCBvbmNvcHJpbnQKICBrbml0cjo6aW5jbHVkZV9ncmFwaGljcyhmaWxlLnBhdGgocGxvdHNfZGlyLCBmaWxlbmFtZSkpCn0KYGBgCgojIERpcmVjdG9yaWVzIGFuZCBGaWxlcwoKYGBge3J9CiMgUGF0aCB0byB0aGUgZGF0YSBvYnRhaW5lZCB2aWEgYGJhc2ggZG93bmxvYWQtZGF0YS5zaGAuCmRhdGFfZGlyIDwtIGZpbGUucGF0aCgiLi4iLCAiLi4iLCAiZGF0YSIpCgojIFBhdGggdG8gb3V0cHV0IGRpcmVjdG9yeSBmb3IgcGxvdHMgcHJvZHVjZWQKcGxvdHNfZGlyIDwtICJwbG90cyIKCmlmICghZGlyLmV4aXN0cyhwbG90c19kaXIpKSB7CiAgZGlyLmNyZWF0ZShwbG90c19kaXIpCn0KYGBgCgoKIyBSZWFkIGluIGRhdGEKCmBgYHtyLCBtZXNzYWdlID0gRkFMU0UsIHdhcm5pbmcgPSBGQUxTRSwgcmVzdWx0cyA9ICJoaWRlIn0KIyBSZWFkIGluIG1ldGFkYXRhCm1ldGFkYXRhIDwtIHJlYWRyOjpyZWFkX3RzdihmaWxlLnBhdGgoZGF0YV9kaXIsICJwYnRhLWhpc3RvbG9naWVzLnRzdiIpKQoKIyBSZW5hbWUgZm9yIG1hZnRvb2xzIGZ1bmN0aW9uCm1ldGFkYXRhIDwtIG1ldGFkYXRhICU+JQogIGRwbHlyOjpyZW5hbWUoIlR1bW9yX1NhbXBsZV9CYXJjb2RlIiA9ICJLaWRzX0ZpcnN0X0Jpb3NwZWNpbWVuX0lEIikKCiMgUmVhZCBtYWYgZmlsZXMgaW50byBhIGxpc3QKbWFmX2xpc3QgPC0gbGlzdCgKICBzdHJlbGthMiA9IHJlYWQubWFmKAogICAgZmlsZS5wYXRoKGRhdGFfZGlyLCAicGJ0YS1zbnYtc3RyZWxrYTIudmVwLm1hZi5neiIpLAogICAgY2xpbmljYWxEYXRhID0gbWV0YWRhdGEsCiAgICB2ZXJib3NlID0gRkFMU0UKICApLAogIG11dGVjdDIgPSByZWFkLm1hZigKICAgIGZpbGUucGF0aChkYXRhX2RpciwgInBidGEtc252LW11dGVjdDIudmVwLm1hZi5neiIpLAogICAgY2xpbmljYWxEYXRhID0gbWV0YWRhdGEsCiAgICB2ZXJib3NlID0gRkFMU0UKICApLAogIGxhbmNldCA9IHJlYWQubWFmKAogICAgZmlsZS5wYXRoKGRhdGFfZGlyLCAicGJ0YS1zbnYtbGFuY2V0LnZlcC5tYWYuZ3oiKSwKICAgIGNsaW5pY2FsRGF0YSA9IG1ldGFkYXRhLAogICAgdmVyYm9zZSA9IEZBTFNFCiAgKSwKICB2YXJkaWN0ID0gcmVhZC5tYWYoCiAgICBmaWxlLnBhdGgoZGF0YV9kaXIsICJwYnRhLXNudi12YXJkaWN0LnZlcC5tYWYuZ3oiKSwKICAgIGNsaW5pY2FsRGF0YSA9IG1ldGFkYXRhLAogICAgdmVyYm9zZSA9IEZBTFNFCiAgKSwKICBjb21iaW5lZCA9IHJlYWQubWFmKAogICAgZmlsZS5wYXRoKCJkYXRhIiwgInBidGEtc252LWNvbWJpbmVkLnZlcC5tYWYuZ3oiKSwKICAgIGNsaW5pY2FsRGF0YSA9IG1ldGFkYXRhLAogICAgdmVyYm9zZSA9IEZBTFNFCiAgKQopCmBgYAoKIyBQbG90LCBEaXNwbGF5LCBhbmQgU2F2ZSBPbmNvcHJpbnRzCgpgYGB7ciBvdXQud2lkdGggPSAiMTUlIn0KIyBQbG90IG9uY29wcmludCB1c2luZyBjb21iaW5lZCBtYWYgb3V0cHV0IGRhdGEgYW5kIHNhdmUuIFRoaXMgY29tYmluZWQgZGF0YSAKIyB3YXMgY3JlYXRlZCB1c2luZyB0aGUgYGNhdGAgZnVuY3Rpb24gdmlhIGNvbW1hbmQgbGluZSB0byBjb21iaW5lIHRoZSBkYXRhIGZyb20KIyBzdHJlbGthMiwgbXV0ZWN0MiwgbGFuY2V0LCBhbmQgdmFyZGljdC4KcGxvdF9vbmNvcGxvdChtYWZfbGlzdCRjb21iaW5lZCwgImNvbWJpbmVkX21hZl9vbmNvcHJpbnQucG5nIikKCiMgUGxvdCBvbmNvcHJpbnQgdXNpbmcgU3RyZWxrYTIgb3V0cHV0IGRhdGEgYW5kIHNhdmUKcGxvdF9vbmNvcGxvdChtYWZfbGlzdCRzdHJlbGthMiwgInN0cmVsa2EyX29uY29wcmludC5wbmciKQoKIyBQbG90IG9uY29wcmludCB1c2luZyBNdXRlY3QyIG91dHB1dCBkYXRhIGFuZCBzYXZlCnBsb3Rfb25jb3Bsb3QobWFmX2xpc3QkbXV0ZWN0MiwgIm11dGVjdDJfb25jb3ByaW50LnBuZyIpCgojIFBsb3Qgb25jb3ByaW50IHVzaW5nIExhbmNldCBvdXRwdXQgZGF0YSBhbmQgc2F2ZQpwbG90X29uY29wbG90KG1hZl9saXN0JGxhbmNldCwgImxhbmNldF9vbmNvcHJpbnQucG5nIikKCiMgUGxvdCBvbmNvcHJpbnQgdXNpbmcgVmFyZGljdCBvdXRwdXQgZGF0YSBhbmQgc2F2ZQpwbG90X29uY29wbG90KG1hZl9saXN0JHZhcmRpY3QsICJ2YXJkaWN0X29uY29wcmludC5wbmciKQpgYGAKCiMgU2Vzc2lvbiBJbmZvCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoK